gvc_agora_opentargets

Setup environment

library(tidyverse)
library(janitor)
library(broom)
library(readxl)
library(jsonlite)

library(gprofiler2)

theme_set(theme_bw())

set.seed(666)

Read and prep data

GVC

Genes within 1Mb window of (each side of?) GVC loci from Fanny:

gvc <- read_xlsx("GVC_1Mb_comparison_050224.xlsx") |>
  clean_names() |> 
  separate(gene_id, c("gene_id", "version")) |>
  select(-version, -agora_nominated_list, -opentarget_info)

gvc
gvc.genes <- gvc |> distinct(gene_id, .keep_all = TRUE) |> select(gene_id, gene_symbol) |> arrange(gene_symbol)

gvc.genes

Agora

Alzheimer’s disease gene prioritization scores from Agora:

ago1 <- read_json("syn25741025.overall_scores.json", simplifyVector = TRUE) |> as_tibble()

ago1

Alzheimer’s disease genes (Agora) from Fanny:

ago2 <- read_csv("AMPAD_agora_032124_gene-list.csv")
ago2
ago <- ago1 |> filter(hgnc_symbol %in% ago2$`Gene Symbol`)

Open Targets

Alzheimer’s disease gene prioritization scores from Open Targets:

ot <- read_tsv("OT-MONDO_0004975-associated-targets-6_4_2024-v24_03.tsv", show_col_types = FALSE, na = "No data")

ot

Add Ensembl Gene IDs (WTF!):

otcols <- colnames(ot)
otensg <- gconvert(
  query = ot$symbol,
  organism = "hsapiens",
  target= "ENSG",
  mthreshold = Inf,
  filter_na = TRUE) |> 
  mutate(input_number = as.character(input_number)) |>
  left_join(ot |> rownames_to_column(var = "input_number"), by = "input_number") |> 
  select(ensembl_gene_id = target, otcols)

otensg

Annotate GVC genes with Agora and Open Targets scores

sum(gvc.genes$gene_id %in% ago$ensembl_gene_id)
[1] 116
sum(gvc.genes$gene_id %in% otensg$ensembl_gene_id)
[1] 405

Arrange by Agora’s genetics_score and Open Targets’ otGeneticsPortal:

d1 <- gvc.genes |>
  left_join(ago, by = join_by(gene_id == ensembl_gene_id)) |> 
  left_join(otensg, by = join_by(gene_id == ensembl_gene_id)) |> 
  arrange(desc(genetics_score), desc(otGeneticsPortal))

d1

Overlap between GVC genes and Agora and Open Targets genes

x = list(
  "GVC" = gvc.genes$gene_id,
  "Agora" = ago$ensembl_gene_id,
  "OpenTargets" = otensg$ensembl_gene_id
)
library(VennDiagram)
grid.newpage()
v <- venn.diagram(
  x,
  fill = c("#FF0000", "#00FF00", "#0000FF"),
  filename = NULL)
grid.draw(v)

p <- get.venn.partitions(x)
p
overlap.genes <- p |>
  unnest(..values..) |>
  filter(..set.. == "GVC∩Agora∩OpenTargets") |> 
  select(gene_id = ..values..) |>
  left_join(ago, by = join_by(gene_id == ensembl_gene_id)) |> 
  left_join(otensg, by = join_by(gene_id == ensembl_gene_id)) |> 
  arrange(desc(genetics_score), desc(otGeneticsPortal))

overlap.genes

Perform ORA of overlap genes

query <- overlap.genes |> distinct(symbol) |> pull(symbol)

gostres <- gost(query = query,
                organism = "hsapiens",
                domain_scope = "annotated",
                exclude_iea = TRUE,
                ordered_query = TRUE,
                significant = TRUE,
                user_threshold = 0.005,
                correction_method = "fdr")

gostres$result |> select(term_name, term_id, source, everything())
gostplot(gostres, capped = FALSE, interactive = TRUE)

Perform ORA of GVC genes sorted by Agora and Open Targets genetics scores

query <- d1 |> distinct(symbol) |> pull(symbol)

gostres <- gost(query = query,
                organism = "hsapiens",
                domain_scope = "annotated",
                exclude_iea = TRUE,
                ordered_query = TRUE,
                significant = TRUE,
                user_threshold = 0.005,
                correction_method = "fdr")

gostres$result |> select(term_name, term_id, source, everything())
gostplot(gostres, capped = FALSE, interactive = TRUE)

Perform correlation analysis of GVC genes using Agora and Open Targets scores

d1 |> nrow()
[1] 1345
d1 |> drop_na(genetics_score, otGeneticsPortal) |> nrow()
[1] 56
d1 |> drop_na(genetics_score, otGeneticsPortal) |>
  summarize(cor = tidy(cor.test(genetics_score, otGeneticsPortal, method="kendall"))) |>
  unnest(cor)
d1 |> nrow()
[1] 1345
d1 |> drop_na(target_risk_score, globalScore) |> nrow()
[1] 75
d1 |> drop_na(target_risk_score, globalScore) |>
  summarize(cor = tidy(cor.test(target_risk_score, globalScore, method="kendall"))) |>
  unnest(cor)

Overall correlation between Agora and Open Targets

d2 <- ago |> 
  left_join(otensg, by = "ensembl_gene_id") |> 
  arrange(desc(genetics_score), desc(otGeneticsPortal))

d2
d2 |> nrow()
[1] 926
d2 |> drop_na(genetics_score, otGeneticsPortal) |> nrow()
[1] 75
d2 |> drop_na(genetics_score, otGeneticsPortal) |>
  summarize(cor = tidy(cor.test(genetics_score, otGeneticsPortal, method="kendall"))) |>
  unnest(cor)
d2 |> nrow()
[1] 926
d2 |> drop_na(target_risk_score, globalScore) |> nrow()
[1] 484
d2 |> drop_na(target_risk_score, globalScore) |>
  summarize(cor = tidy(cor.test(target_risk_score, globalScore, method="kendall"))) |>
  unnest(cor)
overlap.genes |> nrow()
[1] 75
overlap.genes |> drop_na(genetics_score, otGeneticsPortal) |> nrow()
[1] 56
overlap.genes |> drop_na(genetics_score, otGeneticsPortal) |>
  summarize(cor = tidy(cor.test(genetics_score, otGeneticsPortal, method="kendall"))) |>
  unnest(cor)